Saved the price column in a separate vector/dataframe called target_data. Moved all of the columns except the ID, date, price, yr_renovated, zipcode, lat, long, sqft_living15, and sqft_lot15 columns into a new data frame called train_data.
House_data <- read.csv("C:/Users/apoor/Desktop/Spring/Intro to Machine Learning/Practicum 1/Practicum1_2.csv", stringsAsFactors=FALSE)
House_data
target_data <- subset(House_data,select=c(3)) #creating df of price column
target_data
train_data <- subset(House_data,select=-c(1:3,15:21)) #creating df of all columns except ID, date, price, yr_renovated, zipcode, lat, long, sqft_living15, and sqft_lot15
train_data
Normalized all of the columns (except the boolean columns waterfront and view) using min-max normalization.
normalize <- function(x) {
return ((x - min(x)) / (max(x) - min(x))) }
train_data_n <- as.data.frame(lapply(train_data[c(1:5,8:11)], normalize)) #normalizing train data except waterfront and view
train_data_n
Built a function called knn.reg that implements a regression version of kNN that averages the prices of the k nearest neighbors. It must use the following signature:
knn.reg (new_data, target_data, train_data, k)
where new_data is a data frame with new cases, target_data is a data frame with a single column of prices from (2), train_data is a data frame with the features from (2) that correspond to a price in target_data, and k is the number of nearest neighbors to consider. It must return the predicted price.
normalize1 <- function(x) {
return ((x - min(x)) / (max(x) - min(x))) }
distance3 <- function(p,q)
{
d<-0
for (i in 1:length(p)){
d<-d+(p[i]-q[i])^2 #finding distance using distance formula
}
return(sqrt(d))
}
neighbours3 <- function(train_data,new_data)
{
m <- nrow(train_data) #to find the distance
ds<- numeric(m)
for (i in 1:m) {
p<- train_data[i,]
q<- as.numeric(new_data)
ds[i]<-distance3(p,q)
}
neighbours3<- as.data.frame(lapply(ds, unlist))
}
k.closest2 <- function(neighbours3,k1) #finding the closest nearing neighbours
{
ordered.neighbours2 <- order(neighbours3)
return(ordered.neighbours2[1:k1])
}
knn.reg <- function(new_data, target_data, train_data, k) #building the knn.reg function
{
NN<-neighbours3(train_data,new_data)
MM <- k.closest2(NN,k)
return(mean(target_data[MM]))
}
Forecasted the price of this new home using regression kNN using k = 4:
bedrooms = 4 | bathrooms = 3 | sqft_living = 4852 | sqft_lot = 9812 | floors = 3 | waterfront = 0 | view = 1 | condition = 3 | grade = 11
sqft_above = 1860 | sqft_basement = 820 | yr_built = 1962
Atr <-c(bedrooms = 4 , bathrooms = 3 , sqft_living = 4852 , sqft_lot = 9812 , floors = 3 , waterfront = 0 , view = 1 , condition = 3 , grade = 11,sqft_above = 1860 , sqft_basement = 820 , yr_built = 1962)
Atr_n <- normalize1(Atr)
Price <-knn.reg(Atr_n,target_data$price,train_data_n,4)
Price #Price of the new house
## [1] 502500